# Set up RMarkdown working directory & some options
# Working directory for easy sourcing of helper functions
#R_BASE_DIR <- "/home/jkor/work_local/projects/seamless/github/R"
#repo_dir <- '/home/jussi/work_local/projects/core/github/R'
R_BASE_DIR <- params$r_base_dir
knitr::opts_knit$set(root.dir = normalizePath(R_BASE_DIR))
# Set general options
knitr::opts_knit$set(echo = TRUE,
fig.align = "center")
Plot ERPs and related infromation. Plot extracted amplitudes and latencies (features) as well. A preliminary analysis to see if the are any effects in the data.
Configurations for this analysis. To see them, tap open the code block.
## Setup
source("init_jkor.R")
source("tools_dataload.R")
require(cowplot)
#require(plotly)
require(DT)
# PROJECT_ROOT_PATH <- '/ukko/projects/SeamlessCare_2015-16/'
# CTAP_ROOT_ID <- 'ctap_tswitch' # 'ctap_tswitch', 'ctap_tswitch_uupu'
PROJECT_ROOT_PATH <- params$project_root_path
CTAP_ROOT_ID <- params$ctap_root_id
PATHS <- create_ctap_paths(PROJECT_ROOT_PATH, CTAP_ROOT_ID)
#PATHS_LOCAL <- create_ctap_paths('/home/jkor/Dropbox/Shared/seamless_jkor/ukonkuva', CTAP_ROOT_ID)
CHANNEL_ARR <- c('Fz','Cz','Pz')
PATH_PATTERNS <- c("src-1_ica", "src-1_blinkICfix", "src-1_blinkICremove")
BRANCHES <- c('asis', 'BLICfix', 'BLICremove')
MBI_CUTOFF <- 1.49
FIG_WIDTH <- 15
OUT_WIDTH_FC <- 15*100
Setup summary:
The raw data consists of subject average ERPs stored as HDF5 files by the CTAP analysis pipe. These are collected into an R -dataset by a separate script tswitch_prepare_erpfeat.R. Also ERP features such as amplitude and latency are extracted by the same script and stored to disk for fast access.
Let’s first load all this data. We also split casename into several columns for practicality.
savedir <- PATHS$data_root
# subject average ERPs
savefile <- file.path(savedir, 'TS03_erp_sbjave.rds')
erp_curves <- readRDS(savefile)
# split casename into a set of classifier variables
erp_curves <- erp_curves %>%
separate(casename , c('subject','part','session','measurement'),
remove = F) %>%
mutate(sbjnr = as.integer(
str_replace_all(subject, '[A-Z]*', '')
))
# ERP features
savefile <- file.path(savedir, 'TS03_erpfeat.rds')
erpfeat <- readRDS(savefile)
Filter out unwanted subjects:
sbjnr is less than 100 for seamless: burnout studyif(CTAP_ROOT_ID == 'ctap_tswitch'){
erp_curves <- erp_curves %>%
filter(sbjnr < 100)
erpfeat <- erpfeat %>%
filter(sbjnr < 100)
}
These are averages of all available trials for a given subject and ERP. The raw ERP data looks like this:
DT::datatable(head(erp_curves))
The columns are:
channel: electrode position according to 10-20 system
time: data point latency in ms
subject: test subject id
part: the second position from casename
session: the third position from casename
measurement: the fourth position from casename
amplitude: ERP amplitude in microvolts
trial_count: ERP trial count (included for convenience)
erpid: ERP identification id
sbjnr: test subject number
Let’s look at the how many different levels there are for erpid and channel. The following are data row counts for different values of the classifying variable:
erp_curves %>% group_by(erpid) %>% count()
## # A tibble: 5 x 2
## # Groups: erpid [5]
## erpid n
## <chr> <int>
## 1 CR 86400
## 2 CRp1 85050
## 3 CRp2 85050
## 4 CRp2to9 86400
## 5 CRp3to9 86400
erp_curves %>% group_by(channel) %>% count()
## # A tibble: 3 x 2
## # Groups: channel [3]
## channel n
## <fct> <int>
## 1 Fz 143100
## 2 Cz 143100
## 3 Pz 143100
There are in total 64 different measurements included as identified by subject.
The ERP feature data has been extracted from the subject average ERPs. It looks like this:
DT::datatable(head(erpfeat))
The columns are:
erpid: ERP identification id
comp: ERP component within one ERP wave, the “peak” selected
channel: electrode position according to 10-20 system
subject: test subject id
part: the second position from casename
session: the third position from casename
measurement: the fourth position from casename
variable: ERP feature
value: feature value
sbjnr: test subject number
Let’s look at how many different comp-erpid combinations we have and how many data rows there are per combination:
erpfeat %>%
filter(variable == 'amplitude') %>%
group_by(channel, comp, erpid) %>%
count()
## # A tibble: 30 x 4
## # Groups: channel, comp, erpid [30]
## channel comp erpid n
## <fct> <chr> <chr> <int>
## 1 Fz P3a CR 64
## 2 Fz P3a CRp1 63
## 3 Fz P3a CRp2 63
## 4 Fz P3a CRp2to9 64
## 5 Fz P3a CRp3to9 64
## 6 Fz P3b CR 64
## 7 Fz P3b CRp1 63
## 8 Fz P3b CRp2 63
## 9 Fz P3b CRp2to9 64
## 10 Fz P3b CRp3to9 64
## # ... with 20 more rows
So there is P3a and P3b for each ERP identified by erpid.
The definitions of each ERP are:
| Task | ERP id | Included stimuli | Description |
|---|---|---|---|
| TS01 | CR | 110, 120 | Number task |
| TS02 | CR | 210, 220 | Letter task |
| TS03 | CR | 111-119, 121-129, 211-219, 221-229 | Switch task: trials at all positions |
| TS03 | CRp1 | 111, 121, 211, 221 | Switch task: switch trials position 1 |
| TS03 | CRp2 | 112, 122, 212, 222 | Switch task: switch trials position 2 |
| TS03 | CRp2to9 | 112-119, 122-129, 212-219, 222-229 | Switch task: repetition trials positions 2-9 |
| TS03 | CRp3to9 | 113-119, 123-129, 213-219, 223-229 | Switch task: repetition trials positions 3-9 |
Here we load MBI scores for each subject and add two classification variables: a two-level and a three-level MBI classifier.
sbjd <- load_subject_features(PROJECT_ROOT_PATH)
## Loading required package: feather
# Return three-level MBI class for a scalar MBI value
get_mbi_3class_single <- function(x){
if (x <= 1.49){
'low'
} else if ((1.49 < x) & (x <= 3.49)){
'mid'
} else {
'high'
}
}
# Return three-level MBI class for a vector of MBI values
# To be used with mutate().
get_mbi_3class <- function(mbi_vec){
sapply(mbi_vec, get_mbi_3class_single)
}
# Add class factors
sbjd <- sbjd %>%
mutate(mbi_class = ordered(MBIScore > MBI_CUTOFF,
levels = c(T,F),
labels = c('high','low')),
mbi_class3 = ordered(get_mbi_3class(MBIScore),
levels = c('high','mid','low'))
)
The table sbjd contains information on both UUPU and SEAMLESS subjects. The data as a searchable table looks like this:
DT::datatable(sbjd)
Let’s see what kind of trial counts exist in the data:
ds <- erp_curves %>%
filter(channel == 'Pz', time == 0)
p <- ggplot(ds) +
geom_dotplot(aes(x = trial_count), binwidth = 5) +
facet_grid(erpid ~ .) +
scale_x_continuous(breaks = seq(0, 600, 50)) +
theme_linedraw() +
labs(x = 'Subject trial count for ERP', y = 'Count of subjects')
p
Most of these seem to be above 50 which should suffice for a large ERP such as the P3.
A zoomed-in version of the figure above:
p <- ggplot(ds) +
geom_dotplot(aes(x = trial_count), binwidth = 5) +
facet_wrap(erpid ~ ., scales = 'free_x', ncol = 2) +
theme_linedraw() +
labs(x = 'Subject trial count for ERP', y = 'Count of subjects')
p
Let’s also see if the high MBI score subjects have also low trial counts? This is suspected as their ERPs will later prove to be small…
ds <- erp_curves %>%
filter(channel == 'Pz', time == 0)
# get MBI classes
ds <- left_join(ds,
select(sbjd, c('subject', 'mbi_class', 'mbi_class3', 'MBIScore')),
by = 'subject')
p <- ggplot(ds) +
geom_point(aes(x = trial_count, y = MBIScore, colour = mbi_class3)) +
facet_wrap(erpid ~ ., scales = 'free_x') +
theme_light() +
scale_colour_brewer(palette = 'Set1') +
labs(x = 'Subject trial count for ERP', y = 'MBI score')
p
The answer is no: high MBI subjects have decent trial counts.
The grand average is the average ERP averaged over all test subjects. Here are grand average ERPs for each erpid separately:
my_gaerp_plot <- function(pd){
cur_erpid <- unique(pd$erpid)
pd <- pd %>%
mutate(value = amplitude,
ds = subject)
gapd <- pd %>%
group_by(channel, time) %>%
summarise(n=n(), mean=mean(amplitude)) %>%
ungroup()
titlestr = sprintf('%s GA ERP, N_sbj = %d', cur_erpid, unique(gapd$n))
p <- ggplot.gaerp(gapd,
#ylimits = c(10, -5),
titlestr = titlestr) #plot
p
# gp <- ggplotly(p)
# savedir_loc <- file.path(RESULTS_LOCAL_BASEPATH, 'figs', 'GA')
# dir.create(savedir_loc, showWarnings = F, recursive = T)
# savefile <- file.path(savedir_loc, sprintf('GAERP_%s.html', cur_erpid))
# htmlwidgets::saveWidget(gp, savefile)
# note: using local save directory as saving to Ukko fails
# for unknown reason.
#tibble(success = T, plot = list(p))
}
erpid_arr <- unique(erp_curves$erpid)
p_list <- vector(length(erpid_arr), mode = 'list')
i = 1
for (id in erpid_arr){
p <- my_gaerp_plot(filter(erp_curves, erpid == id))
p_list[i] <- list(p)
i <- i +1
}
cowplot::plot_grid(plotlist = p_list, ncol = 2)
The two components are clearly visible. Because the ERPs are visual, we use channel Pz for finding the GA peak position and for extracting the ERP features.
# Merge MBI information into ERP curve data
mbiage <- aggregate_subject_features(sbjd, mbi_th = 1.49)
erp_curves_mbi <- left_join(erp_curves,
mbiage %>% select(sbjnr, mbigroup),
by = 'sbjnr')
erp_curves_mbi <- left_join(erp_curves_mbi,
sbjd %>% select(subject, mbi_class, mbi_class3),
by = 'subject')
my_gaerp_plot_bymbi <- function(gapd, class_variable){
ext.val <- max(abs(gapd$mean_amp))
ylimits <- c(ceiling(ext.val), -ceiling(ext.val))
p <- ggplot(data = gapd) +
geom_line(aes_string(x = 'time', y = 'mean_amp',
group = class_variable, color = class_variable)) +
geom_hline(yintercept = 0, size = 0.3) +
geom_vline(xintercept = 0, size = 0.3) +
facet_grid(erpid ~ channel) +
scale_colour_brewer(type = 'qual', palette = 2) +
scale_y_reverse(limits = c(14, -7)) + #should apply to both projects!
labs(x = 'Time (ms)', y = 'Amplitude (muV)')
p
}
gapd1 <- erp_curves_mbi %>%
group_by(part, session, mbi_class, erpid, channel, time) %>%
summarise(n = n(), mean_amp = mean(amplitude)) %>%
ungroup() %>%
mutate(mbi_class = fct_rev(mbi_class))
p <- my_gaerp_plot_bymbi(gapd1, 'mbi_class') +
labs(title = 'GA ERP by project MBI grouping, 2 levels')
p
gapd2 <- erp_curves_mbi %>%
group_by(part, session, mbi_class3, erpid, channel, time) %>%
summarise(n = n(), mean_amp = mean(amplitude)) %>%
ungroup() %>%
mutate(mbi_class3 = fct_rev(mbi_class3))
p <- my_gaerp_plot_bymbi(gapd2, 'mbi_class3') +
labs(title = 'GA ERP by project MBI grouping, 3 levels')
p
gapd3 <- erp_curves_mbi %>%
group_by(part, session, mbigroup, erpid, channel, time) %>%
summarise(n = n(), mean_amp = mean(amplitude)) %>%
ungroup()
p <- my_gaerp_plot_bymbi(gapd3 %>% filter(mbigroup %in% c('low','high')),
'mbigroup') +
labs(title = 'GA ERP project-independent MBI grouping')
p
First we add the MBI data to ERP feature data.
# Merge into ERP feature data
pd <- left_join(erpfeat,
select(sbjd, c('subject', 'mbi_class', 'mbi_class3', 'MBIScore')),
by = 'subject') %>%
mutate(erpid_mbi = paste0(erpid,'_',mbi_class))
Here a two-level MBI classification is used. Cutoff at MBIScore == 1.5.
## Boxplot
erpid_boxplot <- function(pds, mbi_class_var){
p <- ggplot(filter(pds, variable == 'amplitude')) +
geom_boxplot(aes_string(x = mbi_class_var, y = 'value',
color = mbi_class_var)) +
geom_jitter(aes_string(x = mbi_class_var, y = 'value'),
alpha = 0.25, size = 0.5, width = 0.25) +
facet_grid(comp ~ channel) +
theme_light() +
theme(axis.text.x = element_text(angle = -45, hjust = 0)) +
scale_colour_brewer(palette = "Set1") +
labs(x = 'Target stimulus', y = 'Amplitude (uV)',
title = sprintf('%s ERP amplitudes within MBI groups', unique(pds$erpid)))
p
}
erpid_arr <- unique(erpfeat$erpid)
p_list <- vector(length(erpid_arr), mode = 'list')
i = 1
for (id in erpid_arr){
p <- erpid_boxplot(pd %>%
filter(erpid == id),
'mbi_class')
p_list[i] <- list(p)
i <- i +1
}
cowplot::plot_grid(plotlist = p_list, ncol = 1)
#tmp <- pd %>% filter(erpid == id)
#erpid_boxplot(pd %>% filter(erpid == id), 'mbi_class')
Here a three-level MBI classification is used. Cutoffs at 1.49 and 3.49 as in Ahola et. al. (2005).
Sokka et. al. (2017) use also three groups but the MBI cutoffs are hard to find from that paper. However, Ahola et. al. (2005) define them as follows:
“Burnout and the dimensional scores were categorized as follows: No burnout (scores 0–1.49), mild burnout (scores 1.50–3.49), and severe burnout (scores 3.50–6).”
Reference: Ahola, K., Honkonen, T., Isometsä, E., Kalimo, R., Nykyri, E., Aromaa, A., & Lönnqvist, J. (2005). The relationship between job-related burnout and depressive disorders—results from the Finnish Health 2000 Study. Journal of affective disorders, 88(1), 55-62.
erpid_arr <- unique(erpfeat$erpid)
p_list <- vector(length(erpid_arr), mode = 'list')
i = 1
for (id in erpid_arr){
p <- erpid_boxplot(pd %>%
filter(erpid == id),
'mbi_class3')
p_list[i] <- list(p)
i <- i +1
}
cowplot::plot_grid(plotlist = p_list, ncol = 1)
Here is a parallel coordinates plot showing that (todo: only true fro seam):
high MBI subjects have very similar low amplitude ERPs accross erpid:s
some low MBI subjects show similar behavior
## Parallel coordinates plot
pdtmp <- pd %>%
mutate(comp_mbi = paste0(comp,'_',mbi_class),
comp_mbi3 = paste0(comp,'_',mbi_class3)) %>%
filter(erpid %in% c('CR','CRp1','CRp2','CRp3to9'))
p <- ggplot(filter(pdtmp, variable == 'amplitude')) +
geom_line(aes(x = erpid, y = value, color = mbi_class3, group = subject)) +
geom_point(aes(x = erpid, y = value, color = mbi_class3, group = subject),
size = 0.5) +
facet_grid(channel ~ comp_mbi3) +
scale_color_brewer(palette = 'Set1') +
theme_light() +
theme(axis.text.x = element_text(angle = -45, hjust = 0)) +
labs(x = 'Target stimulus', y = 'Amplitude (uV)',
title = 'ERP amplitudes within MBI groups')
p